clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Creating Additional Variables used in Basic regressions
********************************************************************************
********************************************************************************
***** Latest update: 06-09-2021
********************************************************************************
** Uses: dta-files generated in 6-Preparingforanalysis.do
********************************************************************************
* global filepath "C:\JPEMicReplication"
* global paperpath "$filepath\TablesGraphs"
log using "$filepath/Logs/10PreparationForAnalysis.log", replace

********************************************************************************
*** DATA FRIESLAND
********************************************************************************

/*** Notes ***/
* No fuel economy observations postfeedback in urban area.

use "$filepath\ConstructedData\analysis_tripdata_1501 tm 1701.dta"


*************************************
*** A. Define additional variables
*************************************
* 0. Generate days after coaching

* 0a. Generic dummies
foreach x of numlist 2(1)5 {
gen byte postcoaching_`x'=cond(datum>=coachdatum_`x' & coachdatum_`x'!=.,1,0)
}

foreach z of numlist 0(7)70 {
gen byte daysaftercoachingA`z'=0
label variable daysaftercoachingA`z' "days after coaching [non treatment specific]"
}
foreach z of numlist 70(7)7 {
gen byte daysbeforecoachingA`z'=0
label variable daysbeforecoachingA`z' "days before coaching [non treatment specific]"
}

bysort chauf_nr_rug: replace daysaftercoachingA0=1 if datum==coachdatum_1

foreach x of numlist 7(7)70 {
bysort chauf_nr_rug: replace daysaftercoachingA`x'=1 if datum>coachdatum_1 + `x' - 7 & datum<=coachdatum_1 + `x'
bysort chauf_nr_rug: replace daysbeforecoachingA`x'=1 if datum<coachdatum_1 - `x' + 7 & datum>=coachdatum_1 - `x'
}
bysort chauf_nr_rug: gen byte daysaftercoachingA70plus=cond(datum>coachdatum_1 + 70 & coachdatum_1!=.,1,0)
label variable daysaftercoachingA70plus "More than 70 days after coaching [non treatment specific]"

bysort chauf_nr_rug: gen byte daysbeforecoachingA70min=cond(datum<coachdatum_1 - 70 & coachdatum_1!=.,1,0)
label variable daysbeforecoachingA70min "More than 70 days before coaching [non treatment specific]"

* 0b. Treatment specific dummies
foreach t of numlist 1(1)4 {
	* Days after coaching
	foreach z of numlist 0(7)70 {
		gen byte daysaftercoaching`z'T`t'=0
		label variable daysaftercoaching`z'T`t' "days after coaching [treatment T`t']"
		}
	bysort chauf_nr_rug: replace daysaftercoaching0T`t'=1 if datum==coachdatum_1 & treatment=="T`t'"
	foreach x of numlist 7(7)70 {
		bysort chauf_nr_rug: replace daysaftercoaching`x'T`t'=1 if datum>coachdatum_1 + `x' - 7 & datum<=coachdatum_1 + `x' & treatment=="T`t'"
	}
	bysort chauf_nr_rug: gen daysaftercoaching70T`t'plus=cond(datum>coachdatum_1 + 70 & coachdatum_1!=. & treatment=="T`t'",1 ,0)
	label variable daysaftercoaching70T`t'plus "More than 70 days after coaching [treatment T`t']"
	
	* Days before coaching
	foreach x of numlist 70(7)7 {
		gen byte daysbeforecoaching`x'T`t'=0
		label variable daysbeforecoaching`x'T`t' "days before coaching [treatment T`t']"
		bysort chauf_nr_rug: replace daysbeforecoaching`x'T`t'=1 if datum<coachdatum_1 - `x' + 7 & datum>=coachdatum_1 - `x' & treatment=="T`t'"
	}
	bysort chauf_nr_rug: gen byte daysbeforecoaching70T`t'min=cond(datum<coachdatum_1 - 70 & coachdatum_1!=. & treatment=="T`t'",1,0)
	label variable daysbeforecoachingA70min "More than 70 days before coaching [non treatment specific]"
}



* i. create month dummies
foreach x of numlist 1(1)12 {
gen byte maand_`x'=cond(month(datum)==`x', 1, 0)
label variable maand_`x' "Dummy month, month_1==1 if January, etc."
}

* ii. create week/month/year variables
gen byte maand=month(datum)
label variable maand "Month of year (1,2,..., 12)"
gen week=datum - mod(dow(datum)-1, 7)
format week datum %td 
gen int weekindex=round(week-date("29/12/2014", "DMY"))/7

label variable week "Week of year (weeks defined as starting on Monday)"
label variable weekindex "Week of year [0, 1, ..] (week 0 starts Monday 29.12.2014)"
gen int jaar=year(datum)
label variable jaar "Year (2015, 2016, 2017)"


* iib. create "periode" variable
** variable "periode" is only meant to be able to generate average values that corresponds to the different experimental periods.
gen byte periode=.
label variable periode "distinguishes the different periods in time (pre-announce, post-announce, post-feedback etc.) in about monthly sections"
replace periode=cond(datum>=date("15-12-2015", "DMY") & datum<date("15-1-2016", "DMY"),1,0)
local i=1
local j=2
while `i'<=8 {
replace periode=-11+ `i' if datum>=date("1-`i'-2015", "DMY") & datum<date("1-`j'-2015", "DMY")
local i=`i'+1
local j=`i'+1
}
replace periode=-2 if datum>=date("1-9-2015", "DMY") & datum<date("2-10-2015", "DMY")
replace periode=-1 if datum>=date("2-10-2015", "DMY") & datum<date("9-11-2015", "DMY")
replace periode= 0 if datum>=date("9-11-2015", "DMY") & datum<date("15-12-2015", "DMY")
replace periode= 1 if datum>=date("15-12-2015", "DMY") & datum<date("15-1-2016", "DMY")
local i=1
local j=2
while `i'<=11 {
replace periode=1+ `i' if datum>=date("15-`i'-2016", "DMY") & datum<date("15-`j'-2016", "DMY")
local i=`i'+1
local j=`i'+1
}
replace periode=13 if datum>=date("15-12-2016", "DMY") & datum<date("1-2-2017", "DMY")


* iii. create additional bustype dummies
bysort voertuig_omschrijving: sum vdl10 vdl14 iris10

gen byte iris12=cond(strpos(voertuig_omschrijving, "IRISBUS CITELIS 12 M"), 1, 0)

gen iris12cng=0
replace iris12cng=1 if strpos(voertuig_omschrijving, "CNG") & iris12==1

gen byte iris10cng=0
replace iris10cng=1 if strpos(voertuig_omschrijving, "CNG") & iris10==1

gen byte vdl12=cond(strpos(voertuig_omschrijving, "VDL CITEA LLE 120"), 1, 0)

* iv. create some additional driver-specific variables, for later use in graphs
gen int gebjaar=year(verjaardag)
label variable gebjaar "Year of birth driver"
gen jaardienst=year(jaarindienst)
label variable jaardienst "Year driver entered service"

*---------------------------------------------------------------
* v. create additional weather dummies
* v.i RainFall
foreach x of numlist 5(5)15 {
local y=`x'+5
gen byte Rain`x'_`y'mm=cond(neerslagsom>=`x' & neerslagsom<`y',1,0)
label variable Rain`x'_`y'mm "Rain fall between `x'-`y' millimeter"
}
foreach x of numlist 20(10)30 {
local y=`x'+10
gen byte Rain`x'_`y'mm=cond(neerslagsom>=`x' & neerslagsom<`y',1,0)
label variable Rain`x'_`y'mm "Rain fall between `x'-`y' millimeter"
}

gen byte Rain0_0mm=cond(neerslagsom>=0 & neerslagsom<0.0001,1,0)
label variable Rain0_0mm "Rain fall between 0-0 millimeter"
gen byte Rain0_5mm=cond(neerslagsom>=0.0001 & neerslagsom<5	,1,0)
label variable Rain0_5mm "Rain fall between 0-5 millimeter"
gen byte Rain5_50mm=cond(neerslagsom>=5 & neerslagsom<50	,1,0)
label variable Rain5_50mm "Rain fall between 5-50 millimeter"


* v.ii Temperature
foreach x of numlist 3(3)6 {
local y=`x'-3
gen byte Temp`x'_`y'C=cond(gem_temp>=-`x' & gem_temp<-`y',1,0)
label variable Temp`x'_`y'C "Average temperature between min. `x'- min.`y' Celcius"
}

foreach x of numlist 0(3)24 {
local y=`x'+3
gen byte Temp`x'_`y'C=cond(gem_temp>=`x' & gem_temp<`y',1,0)
label variable Temp`x'_`y'C "Average temperature between `x'-`y' Celcius"
}


gen byte Temp10_5C=cond(gem_temp<5,1,0)
label variable Temp10_5C "Average temperature between min. 10- plus 5 Celcius"


foreach x of numlist 5(5)20 {
local y=`x'+5
gen byte Temp`x'_`y'C=cond(gem_temp>=`x' & gem_temp<`y',1,0)
label variable Temp`x'_`y'C "Average temperature between `x'-`y' Celcius"
}


* v.iii Wind
foreach x of numlist 0(2)8 {
local y=`x'+2
gen byte Wind`x'_`y'ms=cond(gem_wind>=`x' & gem_wind<`y',1,0)
label variable Wind`x'_`y'ms "Wind speed between `x'-`y' meter/second"
}
gen byte Wind10_20ms=cond(gem_wind>=10,1,0)
label variable Wind10_20ms "Wind speed between 10-20 meter/second"


gen byte Wind0_3ms=cond(gem_wind>=0 & gem_wind<3,1,0)
label variable Wind0_3ms "Wind speed between 0-3 meter/second"
gen byte Wind4_5ms=cond(gem_wind>=4 & gem_wind<5,1,0)
label variable Wind4_5ms "Wind speed between 4-5 meter/second"
gen byte Wind5_8ms=cond(gem_wind>=5 & gem_wind<8,1,0)
label variable Wind5_8ms "Wind speed between 5-8 meter/second"
gen byte Wind8_20ms=cond(gem_wind>=8 & gem_wind<20,1,0)
label variable Wind8_20ms "Wind speed between 8-20 meter/second"

* v.iv Relation fuel economy and #passengers is non-linear, so take ln of ovcheckins
gen lnovcheckins=ln(ovcheckins)
label variable  lnovcheckins "ln(ovcheckins)"
gen byte ovcheckinsmissing=0
replace ovcheckinsmissing=1 if ovcheckins==.
label variable ovcheckinsmissing "=1 if ov checkin information is missing, 0 otherwise"

* vi. How long is the driver already working on a given day? [This is code for the Slacking-paper]
bysort chauf_nr_rug datum: egen double startdag=min(geplande_begintijd_rit)
label variable startdag "Time driver is expected to start first trip on a given day"
bysort chauf_nr_rug datum: egen double einddag=max(geplande_eindtijd_rit)
label variable einddag "Time driver is expected to have completed final trip on a given day"	
gen double midtijd= (geplande_eindtijd_rit +geplande_begintijd_rit)/2
label variable midtijd "Time halfway planned starting time and end time trip"	
format startdag %tc
format einddag %tc
format midtijd %tc
gen tijdonderweg=(midtijd-startdag)/3600000
label variable tijdonderweg "Time in hours that driver is working"
sort chauf_nr_rug datum geplande_begintijd_rit
by chauf_nr_rug datum: gen double geplandepauze=minutes(geplande_begintijd_rit - geplande_eindtijd_rit[_n-1])
label variable geplandepauze "Break before start ride according to schedule [in minutes]"
by chauf_nr_rug datum: gen double werkelijkepauze=minutes(werkelijke_begintijd_rit - werkelijke_eindtijd_rit[_n-1])
label variable werkelijkepauze "Actual break before start ride [in minutes]"
* just a trial: 
scatter geplandepauze werkelijkepauze if chauf_nr_rug==1 & werkelijkepauze>=0, ms(Oh) msize(vtiny) name(plannedvsactualbreak, replace)

*---------------------------------------------------------------
foreach x of numlist 2(1)4 {
gen byte T`x'=cond(treatment=="T`x'",1,0)
label variable T`x' "Dummy; =1 if treatment groep is T`x'"
}

foreach x of numlist 2(1)4 {
gen  byte postfeedbackxT`x'=postfeedback*T`x'
label variable postfeedbackxT`x' "Dummy; =1 if treatment groep is T`x' and observation is in postfeedback period"
}

foreach x of numlist 2(1)4 {
gen byte postexperimentxT`x'=postexperiment*T`x'
label variable postexperimentxT`x' "Dummy; =1 if treatment groep is T`x' and observation is in postexperimental period"
}

gen byte feedround_min2=cond(datum>=date("2-10-2015", "DMY") & datum<date("9-11-2015", "DMY"),1,0)
gen byte feedround_min1=cond(datum>=date("9-11-2015", "DMY") & datum<date("15-12-2015", "DMY"),1,0)
gen byte feedround_0=cond(datum>=date("15-12-2015", "DMY") & datum<date("15-1-2016", "DMY"),1,0)

local i=1
local j=2
while `i'<=11 {
gen byte feedround_`i'=cond(datum>=date("15-`i'-2016", "DMY") & datum<date("15-`j'-2016", "DMY"),1,0)
local i=`i'+1
local j=`i'+1
}

gen byte feedround_12 =cond(datum>=date("15-12-2016", "DMY") & datum<date("15-1-2017", "DMY"),1,0)
order vdl10 vdl12 vdl14 iris10 iris10cng iris12 iris12cng intouro, last

rename fuel_economy dep_fueleconomy
** Account for "MPG illusion"
gen dep_fueleconomyLpKM = 100/dep_fueleconomy
label variable  dep_fueleconomyLpKM "Fuel economy as: Liters of fuel per 100km distance"


qui compress
** SAVE RESULTING FILE **
save "$filepath\ConstructedData\DataMainAnalysisFR.dta", replace
**

** Check: Is there a unique 1-to-1 relation between line number and route?
duplicates drop lijn_nr heen_van heen_naar, force
sort lijn_nr
list lijn_nr heen_van heen_naar
* Yes, there is. This implies we can include line-number dummies without further amends.

graph close

********************************************************************************
*** DATA ZUID HOLLAND
********************************************************************************
clear all
use "$filepath\ConstructedData\analysis_ZHDAV_1507 tm 1712.dta"

*************************************
*** A. Define additional variables
*************************************

/*** 1. Define additional variables ***/

*** 1.1 Define driver-specific days before and after coaching

* Accounting for additional coaching sessions
foreach x of numlist 2(1)5 {
gen byte postcoaching_`x'=cond(datum>=coachdatum_`x' & coachdatum_`x'!=.,1,0)
}

* Days/weeks after coaching
foreach z of numlist 0(7)70 {
gen byte daysaftercoachingA`z'=0
label variable daysaftercoachingA`z' "days after coaching [non treatment specific]"
}

* Days/weeks before coaching
foreach z of numlist 70(7)7 {
gen byte daysbeforecoachingA`z'=0
label variable daysbeforecoachingA`z' "days before coaching [non treatment specific]"
}

bysort driverid: replace daysaftercoachingA0=1 if datum==coachdatum_1


* Determine driver-specific ten-week interval before and after coaching
foreach x of numlist 7(7)70 {
bysort driverid: replace daysaftercoachingA`x'=1 if datum>coachdatum_1 + `x' - 7 & datum<=coachdatum_1 + `x'
bysort driverid: replace daysbeforecoachingA`x'=1 if datum<coachdatum_1 - `x' + 7 & datum>=coachdatum_1 - `x'
}
bysort driverid: gen daysaftercoachingA70plus=cond(datum>coachdatum_1 + 70 & coachdatum_1!=.,1,0)
bysort driverid: gen daysbeforecoachingA70min=cond(datum<coachdatum_1 - 70 & coachdatum_1!=.,1,0)

label variable daysaftercoachingA70plus "More than 70 days after coaching [non treatment specific]"
label variable daysbeforecoachingA70min "More than 70 days before coaching [non treatment specific]"


*** 1.3 Create month dummies

foreach x of numlist 1(1)12 {
gen byte maand_`x'=cond(month(datum)==`x',1,0)
label variable maand_`x' "Dummy month, month_1==1 if January, etc."
}

*** 1.4 Create week/month/year variables
gen byte maand=month(datum)
label variable maand "Month of year (1,2,..., 12)"
gen week=datum - mod(dow(datum)-1, 7)
format week datum %td 
gen int weekindex=round(week-date("29/12/2014", "DMY"))/7

label variable week "Week of year (weeks defined as starting on Monday)"
label variable weekindex "Week of year [0, 1, ..] (week 0 starts Monday 29.12.2014)"
gen int jaar=year(datum)
label variable jaar "Year (2015, 2016, 2017)"

*** 1.4b. create "periode" variable
** variable "periode" is only meant to be able to generate average values that corresponds to the  different experimental periods in FR
gen byte periode=.
label variable periode "distinguishes the different periods in time (pre-announce, post-announce, post-feedback etc.) in about monthly sections"
replace periode=cond(datum>=date("15-12-2015", "DMY") & datum<date("15-1-2016", "DMY"),1,0)
local i=1
local j=2
while `i'<=8 {
replace periode=-11+ `i' if datum>=date("1-`i'-2015", "DMY") & datum<date("1-`j'-2015", "DMY")
local i=`i'+1
local j=`i'+1
}
replace periode=-2 if datum>=date("1-9-2015", "DMY") & datum<date("2-10-2015", "DMY")
replace periode=-1 if datum>=date("2-10-2015", "DMY") & datum<date("9-11-2015", "DMY")
replace periode= 0 if datum>=date("9-11-2015", "DMY") & datum<date("15-12-2015", "DMY")
replace periode= 1 if datum>=date("15-12-2015", "DMY") & datum<date("15-1-2016", "DMY")
local i=1
local j=2
while `i'<=11 {
replace periode=1+ `i' if datum>=date("15-`i'-2016", "DMY") & datum<date("15-`j'-2016", "DMY")
local i=`i'+1
local j=`i'+1
}
replace periode=13 if datum>=date("15-12-2016", "DMY") & datum<date("1-2-2017", "DMY")
local i=1
local j=2
while `i'<=11 {
replace periode=13+ `i' if datum>=date("1-`i'-2017", "DMY") & datum<=date("1-`j'-2017", "DMY")
local i=`i'+1
local j=`i'+1
}
replace periode=25 if datum>=date("1-12-2017", "DMY") & datum<date("1-1-2018", "DMY")

*** 1.4 Create extra bus type dummy variables
gen byte iris12=cond(strpos(voertuig_omschrijving, "IRISBUS CITELIS 12 M"), 1, 0)
gen intouro=cond(strpos(voertuig_omschrijving, "INTOURO"),1,0)
gen iris12cng=0
replace iris12cng=1 if strpos(voertuig_omschrijving, "CNG") & iris12==1

gen byte iris10cng=0
replace iris10cng=1 if strpos(voertuig_omschrijving, "CNG") & iris10==1


*** 1.5 create some additional driver-specific variables, for later use in graphs
gen int gebjaar=year(mdy(2,1, birthyear))
label variable gebjaar "Year of birth driver"
gen int jaardienst=year(mdy(2,1, employmentyear))
label variable jaardienst "Year driver entered service"

*** 1.6 Create additional weather dummies

* Rainfall
foreach x of numlist 5(5)15 {
local y=`x'+5
gen byte Rain`x'_`y'mm=cond(neerslagsom>=`x' & neerslagsom<`y',1,0)
label variable Rain`x'_`y'mm "Rain fall between `x'-`y' millimeter"
}
foreach x of numlist 20(10)30 {
local y=`x'+10
gen byte Rain`x'_`y'mm=cond(neerslagsom>=`x' & neerslagsom<`y',1,0)
label variable Rain`x'_`y'mm "Rain fall between `x'-`y' millimeter"
}

gen byte Rain0_0mm=cond(neerslagsom>=0 & neerslagsom<0.0001,1,0)
label variable Rain0_0mm "Rain fall between 0-0 millimeter"
gen byte Rain0_5mm=cond(neerslagsom>=0.0001 & neerslagsom<5	,1,0)
label variable Rain0_5mm "Rain fall between 0-5 millimeter"
gen byte Rain5_50mm=cond(neerslagsom>=5 & neerslagsom<50	,1,0)
label variable Rain5_50mm "Rain fall between 5-50 millimeter"

* Temperature
foreach x of numlist 3(3)6 {
local y=`x'-3
gen byte Temp`x'_`y'C=cond(gem_temp>=-`x' & gem_temp<-`y',1,0)
label variable Temp`x'_`y'C "Average temperature between min. `x'- min.`y' Celcius"
}

foreach x of numlist 0(3)24 {
local y=`x'+3
gen byte Temp`x'_`y'C=cond(gem_temp>=`x' & gem_temp<`y',1,0)
label variable Temp`x'_`y'C "Average temperature between `x'-`y' Celcius"
}

gen byte Temp10_5C=cond(gem_temp<5,1,0)
label variable Temp10_5C "Average temperature between min. 10- plus 5 Celcius"


foreach x of numlist 5(5)20 {
local y=`x'+5
gen byte Temp`x'_`y'C=cond(gem_temp>=`x' & gem_temp<`y',1,0)
label variable Temp`x'_`y'C "Average temperature between `x'-`y' Celcius"
}

* Wind
foreach x of numlist 0(2)8 {
local y=`x'+2
gen byte Wind`x'_`y'ms=cond(gem_wind>=`x' & gem_wind<`y',1,0)
label variable Wind`x'_`y'ms "Wind speed between `x'-`y' meter/second"
}
gen byte Wind10_20ms=cond(gem_wind>=10,1,0)
label variable Wind10_20ms "Wind speed between 10-20 meter/second"


gen byte Wind0_3ms=cond(gem_wind>=0 & gem_wind<3,1,0)
label variable Wind0_3ms "Wind speed between 0-3 meter/second"
gen byte Wind4_5ms=cond(gem_wind>=4 & gem_wind<5,1,0)
label variable Wind4_5ms "Wind speed between 4-5 meter/second"
gen byte Wind5_8ms=cond(gem_wind>=5 & gem_wind<8,1,0)
label variable Wind5_8ms "Wind speed between 5-8 meter/second"
gen byte Wind8_20ms=cond(gem_wind>=8 & gem_wind<20,1,0)
label variable Wind8_20ms "Wind speed between 8-20 meter/second"

*** 1.7 Accounting for non-linear relation between fuel economy and number of passengers

gen lnovcheckins=ln(ovcheckins)
label variable  lnovcheckins "ln(ovcheckins)"
gen byte ovcheckinsmissing=0
replace ovcheckinsmissing=1 if ovcheckins==.
label variable ovcheckinsmissing "=1 if ov checkin information is missing, 0 otherwise"

*** 1.8 How long is the driver already working on a given day? [This is code for the Slacking-paper]
bysort driverid datum: egen double startdag=min(geplande_begintijd_rit)
label variable startdag "Time driver is expected to start first trip on a given day"
bysort driverid datum: egen double einddag=max(geplande_eindtijd_rit)
label variable einddag "Time driver is expected to have completed final trip on a given day"	
gen double midtijd= (geplande_eindtijd_rit +geplande_begintijd_rit)/2
label variable midtijd "Time halfway planned starting time and end time trip"	
format startdag %tc
format einddag %tc
format midtijd %tc
gen tijdonderweg=(midtijd-startdag)/3600000
label variable tijdonderweg "Time in hours that driver is working"
sort driverid datum geplande_begintijd_rit
by driverid datum: gen double geplandepauze=minutes(geplande_begintijd_rit - geplande_eindtijd_rit[_n-1])
label variable geplandepauze "Break before start ride according to schedule [in minutes]"
by driverid datum: gen double werkelijkepauze=minutes(werkelijke_begintijd_rit - werkelijke_eindtijd_rit[_n-1])
label variable werkelijkepauze "Actual break before start ride [in minutes]"
* just a trial: 
scatter geplandepauze werkelijkepauze if driverid==1 & werkelijkepauze>=0, ms(Oh) msize(vtiny) name(plannedvsactualbreakZH, replace)


*** General feedback rounds in ZH 
** Info: The kick-off event with official announcement was op 1 December 2016. The first written report was distributed mid January 2017. 
gen post_0=cond(datum>=date("15-1-2017", "DMY"),1,0)

local i=1
while `i'<=11 {
	local j = `i' + 1
gen post_`i'=cond(datum>=date("15-`j'-2017", "DMY") ,1,0)
local i=`i'+1
}
 
gen byte postfeedback=0
foreach x of numlist 0(1)11 {
qui replace postfeedback=1 if post_`x'==1 & postfeedback==0 
}

gen byte noreport=cond(post_0==1 & postfeedback==0,1,0)
gen byte nonotification=0
drop post_*

*** 2. Determine driver-specific feedback rounds
* -

*** 3. Determine post-announcement and post-experiment period

gen postannouncement=cond(datum>=date("1-12-2016", "DMY"),1,0)
gen postexperiment=0

gen byte feedround_min2=cond(datum>=date("1-11-2016", "DMY") & datum<date("1-12-2016", "DMY"),1,0)
gen byte feedround_min1=cond(datum>=date("1-12-2016", "DMY") & datum<date("15-1-2017", "DMY"),1,0)
gen byte feedround_0=cond(datum>=date("15-1-2017", "DMY") & datum<date("15-2-2017", "DMY"),1,0)

gen feedround=0

local i=1
local j=2
local k=3
while `i'<=10 {
gen byte feedround_`i'=cond(datum>=date("15-`j'-2017", "DMY") & datum<date("15-`k'-2017", "DMY"),1,0)
replace feedround=`i'
local i=`i'+1
local j=`i'+1
local k=`i'+2
}
gen byte feedround_11=0
gen byte feedround_12=0

rename fuel_economy dep_fueleconomy
** Account for "MPG illusion"  -- for ZH fueleconomy already is in liters/100km
gen dep_fueleconomyLpKM = dep_fueleconomy
label variable  dep_fueleconomyLpKM "Fuel economy as: Liters of fuel per 100km distance"

gen regio="ZH"

qui compress
** SAVE RESULTING FILE **
save "$filepath\ConstructedData\DataMainAnalysisZH.dta", replace
**

** Check: Is there a unique 1-to-1 relation between line number and route?
duplicates drop lijn_nr heen_van heen_naar, force
sort lijn_nr
list lijn_nr heen_van heen_naar
* Yes, there by and large is. This implies we can include line-number dummies without further amends.

** Construct data set that merges FR and ZH data
clear all
use "$filepath\ConstructedData\DataMainAnalysisZH.dta"
append using "$filepath\ConstructedData\DataMainAnalysisFR.dta"

** 
** Addd FRiesland as regio
replace regio="FR" if regio =="" 
* Translate driverid ZH to chauf_nr_rug by multiplying by 1000
replace chauf_nr_rug=driverid + 1000 if chauf_nr_rug==. & regio == "ZH"
tab coachidentifier
** Two ZH coaches are stationed at the same base location and cannot be separately identified
replace coachidentifier = "244358" if coachidentifier == "244/358"
destring coachidentifier, replace
replace eco_coach_nr_rug=coachidentifier + 1000 if eco_coach_nr_rug==. & regio == "ZH" & coachidentifier!=244358
replace eco_coach_nr_rug=244358 if eco_coach_nr_rug==. & regio == "ZH" & coachidentifier==244358

replace fulltimer=1 if  regio == "ZH" & contractdescription == "Full-time"
** For ZH, the year but not the exact birthday or date of first employment are available.
replace standplaats=ves_naam if  regio == "ZH" & standplaats==""
replace geslacht=0 if regio == "ZH" & gender == 0
replace geslacht=1 if regio == "ZH" & gender == 1
replace schooltraject=0 if regio == "ZH" & schooltraject == .
replace stadsrit=0 if regio == "ZH" & stadsrit == .
foreach x of numlist 1(1)4 {
	foreach z of numlist 7(7)70 {
		replace daysaftercoaching`z'T`x' = 0 if regio == "ZH" 
		replace daysbeforecoaching`z'T`x' = 0 if regio == "ZH" 
	}
	replace daysaftercoaching0T`x' = 0 if regio == "ZH" 
	replace daysaftercoaching70T`x'plus = 0 if regio == "ZH" 
	replace daysbeforecoaching70T`x'min = 0 if regio == "ZH" 
}

foreach x of numlist 2(1)4 {
	replace T`x' = 0 if regio == "ZH" & T`x' == .
	replace postfeedbackxT`x' = 0 if regio == "ZH" & postfeedbackxT`x' == .
	replace postexperimentxT`x' = 0 if regio == "ZH" & postexperimentxT`x' == .
}

drop contractdescription coachlogstandplaats verjaardag jaarindienst

qui compress
** SAVE RESULTING FILE **

save "$filepath\ConstructedData\DataMainAnalysisALL.dta", replace

log close
